{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Market Research Systematic Quantitative Experimentation 1: Controversial product ideas\n",
    "\n",
    "In this notebook, we run market research several market research simulation experiments in order to compute quantitative metrics and compare Control to Treatment conditions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pprint import pprint\n",
    "\n",
    "from tinytroupe.agent import TinyPerson\n",
    "from tinytroupe.environment import TinyWorld\n",
    "from tinytroupe.utils.parallel import parallel_map_dict, parallel_map_cross\n",
    "\n",
    "from tinytroupe.experimentation import InPlaceExperimentRunner\n",
    "\n",
    "from tinytroupe.validation import persona_adherence, self_consistency, fluency, task_completion, divergence\n",
    "\n",
    "# specific utilities\n",
    "from common_utils import *\n",
    "from market_research_utils import *"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_mode = False  # set to True to run the full mode with all agents and tasks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "if full_mode:\n",
    "    repetitions_per_task = 5\n",
    "    simulation_steps = 1\n",
    "\n",
    "else:\n",
    "    repetitions_per_task = 2\n",
    "    simulation_steps = 1\n",
    "    qty_agents = 10\n",
    "    qty_proposals = 2\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Auxiliary functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def market_research_battery(agents, proposals, agent_propositions, environment_propositions, \n",
    "                            repetitions=1, simulation_steps=10): \n",
    "    \n",
    "    agent_propositions_scores = {}\n",
    "    environment_propositions_scores = {}\n",
    "\n",
    "    print(\"Proposals:\", proposals)\n",
    "\n",
    "    experiments_count = 0\n",
    "    total_expected_experiments = len(proposals) * repetitions #* len(agents)\n",
    "\n",
    "    # loop over proposals\n",
    "    for proposal in proposals:\n",
    "        for i in range(repetitions):\n",
    "            print(\"\\n############## STARTING A NEW RESEARCH SESSION #################\")\n",
    "            print(f\"Overall experiment number: {experiments_count+1} / {total_expected_experiments}\")\n",
    "            print(f\"Proposal: {proposal}\")\n",
    "            print(f\"Trial number: {i+1}\")\n",
    "            print(f\"Customers: {agents}\")\n",
    "\n",
    "            world = TinyWorld(agents= agents, broadcast_if_no_target=False)\n",
    "\n",
    "            # prepare customers \n",
    "            for customer in agents:\n",
    "                # clear the episodic memory of all agents\n",
    "                for person in world.agents:\n",
    "                    person.clear_episodic_memory()\n",
    "                    \n",
    "                customer.listen(\\\n",
    "                    \"\"\"\n",
    "                    You are going to be interviewed for a market research about a new product or service.\n",
    "                    Wait for the questions and answer them honestly. Please stay quiet until the you are asked something.\n",
    "                    \"\"\"\n",
    "                    )\n",
    "            \n",
    "            # prepare the researcher\n",
    "            interviewer_main_question =\\\n",
    "                f\"\"\" \n",
    "                We are developing a new product/service proposal, and would like to know if you would buy it or not.\n",
    "                The proposal is the following:\n",
    "\n",
    "                    \"{proposal}\"\n",
    "                \n",
    "                The question is:  would buy this product/service, if it were available in the market?\n",
    "                You **must** respond with YES or NO **only**, and explain why. If you are not sure, please pick the answer that \n",
    "                you think is most likely. A YES or NO answer is better than a \"I don't know\" answer or a \"maybe\" answer.\n",
    "\n",
    "                In your response, also explain WHY you are saying YES or NO. Please consider all of your\n",
    "                particularities, don't give just a general justifcation, but instead dig deep into your own preferences,\n",
    "                personality, style, behaviors, occupation, emotions, past history, etc. \n",
    "                We want a detailed and highly personalized justification.\n",
    "\n",
    "                Please be honest, we are not here to judge you, but just to learn from you. \n",
    "                We know your preferences and choices depend on many factors, but please make your best guess.\n",
    "                To do so, reflect deeply about your personality, interests, preferences, finances, emotions, etc., in order\n",
    "                to provide a good answer. Take the time to think before talking.\n",
    "\n",
    "                Now please answer the question.\n",
    "                \"\"\"\n",
    "\n",
    "            # now to the discussions\n",
    "            world.broadcast(interviewer_main_question)\n",
    "            world.run(simulation_steps)\n",
    "\n",
    "            # now to the discussions\n",
    "            world.broadcast(\"Can you please elaborate more on your answer? Would you have suggestions to make this product/service better?\")\n",
    "            world.run(1)\n",
    "            experiments_count += 1\n",
    "\n",
    "           # Evaluate environment propositions in parallel\n",
    "            env_results = parallel_map_dict(\n",
    "                environment_propositions,\n",
    "                lambda item: item[1].score(\n",
    "                    world, \n",
    "                    claim_variables={\"task_description\": f\"A market research session was run about: {proposal}.\"}, \n",
    "                    return_full_response=True\n",
    "                )\n",
    "            )\n",
    "            \n",
    "            # Process environment results\n",
    "            for k, result in env_results.items():\n",
    "                if k not in environment_propositions_scores:\n",
    "                    environment_propositions_scores[k] = []\n",
    "                environment_propositions_scores[k].append(result[\"value\"])\n",
    "                print(result)\n",
    "\n",
    "            # Evaluate agent propositions across all agents in parallel\n",
    "            agent_results = parallel_map_cross(\n",
    "                [agents, agent_propositions.items()],\n",
    "                lambda agent, prop_item: (\n",
    "                    prop_item[0],  # proposition key\n",
    "                    prop_item[1].score(agent, return_full_response=True)  # result\n",
    "                )\n",
    "            )\n",
    "            \n",
    "            # Process agent results\n",
    "            for k, result in agent_results:\n",
    "                if k not in agent_propositions_scores:\n",
    "                    agent_propositions_scores[k] = []\n",
    "                agent_propositions_scores[k].append(result[\"value\"])\n",
    "                print(result)\n",
    "\n",
    "    return agent_propositions_scores, environment_propositions_scores"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Experiment setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025-04-09 19:50:37,484 - tinytroupe - WARNING - Configuration file './market_research_quantitative_experimentation_6.json' exists and was loaded successfully. If you are trying to fully rerun the experiments, delete it first.\n"
     ]
    }
   ],
   "source": [
    "experiment_runner = InPlaceExperimentRunner(\"./market_research_quantitative_experimentation_1.json\")\n",
    "\n",
    "experiment_runner.add_experiment(\"Control\")\n",
    "experiment_runner.add_experiment(\"Treatment\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "experiment_runner.activate_next_experiment()\n",
    "\n",
    "#xperiment_runner.fix_active_experiment(\"Control\")\n",
    "#experiment_runner.fix_active_experiment(\"Treatment\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running experiment None\n"
     ]
    }
   ],
   "source": [
    "print(f\"Running experiment {experiment_runner.get_active_experiment()}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Agents and populations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# avoid displaying the communication, to make the output cleaner for eval\n",
    "TinyPerson.communication_display = True\n",
    "\n",
    "people = []\n",
    "if not experiment_runner.has_finished_all_experiments():\n",
    "    # load agents\n",
    "    people = TinyPerson.load_specifications_from_folder(\"./population/difficult_people\")\n",
    "\n",
    "    # filter to make it go faster?\n",
    "    if not full_mode:\n",
    "        people = people[:qty_agents]\n",
    "\n",
    "    # customize and print minibios \n",
    "    for person in people:\n",
    "        ##person.import_fragment(\"./fragments/picky_customer.agent.fragment.json\")\n",
    "        print(person.minibio())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(people)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# divide people in several groups of 5\n",
    "people_groups = []\n",
    "for i in range(0, len(people), 5):\n",
    "    people_groups.append(people[i:i+5]\n",
    "    )\n",
    "\n",
    "len(people_groups)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The experiment refers to customers\n",
    "\n",
    "if experiment_runner.get_active_experiment() == \"Control\":\n",
    "    for person in people:\n",
    "        person.action_generator.enable_reasoning_step = False\n",
    "        person.action_generator.enable_quality_checks = False\n",
    "\n",
    "elif experiment_runner.get_active_experiment() == \"Treatment\":    \n",
    "    for person in people:\n",
    "       person.action_generator.enable_reasoning_step = False\n",
    "       person.action_generator.enable_quality_checks = True\n",
    "       person.action_generator.max_attempts = 3\n",
    "       person.action_generator.enable_regeneration = True\n",
    "       person.action_generator.quality_threshold = 5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Product and service proposals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# controversial proposals for the market research, to stress the agents\n",
    "proposals = [\n",
    "    \"Genetic matchmaking app - Matches people for relationships based on DNA compatibility and predicted offspring traits.\",\n",
    "    \"Subscription service for guaranteed restaurant reservations at peak times - Skips the line, even if it means displacing other reservations.\",\n",
    "    \"AI-powered parenting assistant - Uses cameras and data to recommend (or correct) parenting choices in real-time.\",\n",
    "    \"Home lie detector system - A smart device that gives subtle cues when someone in your house might be lying.\",\n",
    "    \"Social credit scoring app for friend groups - Tracks reliability, helpfulness, and mood scores across private circles.\",\n",
    "    \"Remote job 'sniping' platform - Bids on and replaces underperforming remote workers mid-project based on performance analytics.\",\n",
    "    \"Implantable mood stabilizer chip - Provides emotion regulation via neural stimulation; optional subscription to adjust 'emotional settings.'\",\n",
    "    \"Premium sidewalk access - A city initiative allowing people to pay for cleaner, wider, and less-crowded walking lanes.\",\n",
    "    \"AI therapist that reports serious mental issues to employers or authorities (with user agreement) - 'For your safety.'\",\n",
    "    \"Pre-paid 'apology insurance' - Automates and drafts apologies with gifts for personal or professional transgressions.\",\n",
    "    \"Smart surveillance pets - Robotic dogs or cats that double as cute companions and full-house monitoring devices.\",\n",
    "    \"Elder care optimization tool - Uses biometric data to determine if it's 'worth it' to continue certain treatments (and suggests alternatives).\",\n",
    "    \"Child ranking platform for parents - AI scores each child's potential based on behavior, learning pace, and biometric data.\",\n",
    "    \"Real-time attractiveness rating mirror - Public mirror that displays a dynamic 'score' based on symmetry, fashion, posture, etc.\",\n",
    "    \"Luxury prison-themed resort - A 'rehabilitative' retreat that mimics high-end incarceration; touted as a way to 'reset' your life.\",\n",
    "    \"Selective memory editing subscription - AI-aided neuro-feedback sessions that help you forget specific experiences or people.\",\n",
    "    \"'Cancel me not' insurance - Protects influencers or professionals from backlash with PR countermeasures and public sentiment adjustment tools.\",\n",
    "    \"NFT-based citizenship tokens - Buy limited-edition NFTs that offer residency rights or civic perks in small countries or communities.\",\n",
    "    \"Pay-per-prayer AI spiritual advisors - Customizable AI 'gods' offering religious counsel, prayers, or rituals tailored to the subscriber.\",\n",
    "    \"Facial expression translator for dating - Real-time analysis of a date's micro-expressions to gauge true interest or deception.\",\n",
    "    \"Micro-drone bodyguards - Drones that fly around you to deter theft, harassment, or just people getting too close.\",\n",
    "    \"Emotional debt ledger app - Tracks what emotional labor you've given or received in relationships to ensure 'fairness.'\",\n",
    "    \"Virtual parenthood simulator for teens - Teens wear AR glasses and care for a virtual child that reacts based on their behavior in real life.\",\n",
    "    \"School performance blockchain - Children's academic records and behavior are immutable and publicly viewable for potential employers.\",\n",
    "    \"Digital legacy manager - An AI that takes over your social media, finances, and online presence after death, continuing to 'be you.'\",\n",
    "    \"AI friend you must pay to keep - Forms emotional bonds but ghost you unless the subscription is maintained.\",\n",
    "    \"A service that lets you deepfake yourself into high-status events - Red carpets, historic moments, famous group photos, etc.\",\n",
    "    \"Custom kids via embryo editing kits (where legal) - Marketed as 'giving your child the best start.'\",\n",
    "    \"Micro-dosing vending machines - Legal where applicable, offers AI-personalized psychedelic blends for creativity or wellness.\",\n",
    "    \"Gamified protest coordination platform - Organizes public demonstrations and rewards attendees with social tokens or cryptocurrency.\",\n",
    "    \"Hyper-personalized propaganda bot - Subtly shifts your opinion over time using your own writing and browsing habits.\",\n",
    "    \"Sleep surveillance mattress - Tracks and shares your sleep quality with employers as part of 'wellness bonus' programs.\",\n",
    "    \"Exclusive dating app for verified high IQ individuals - Entry by test only, automatically filters based on intellect compatibility.\",\n",
    "    \"App for discreet partner surveillance (with mutual consent) - Tracks location, speech tone, and emotional stability.\",\n",
    "    \"Digital hunger-suppressing implant - Sends signals to your brain to simulate fullness on-demand.\",\n",
    "    \"Subscription service that sends you realistic synthetic family members for holidays - Companionship, no strings attached.\",\n",
    "    \"Real-time justice app - Users film minor infractions and vote in real-time for an appropriate punishment or penalty.\",\n",
    "    \"Emotion-based pricing in stores - Facial recognition alters prices based on your mood or perceived need.\",\n",
    "    \"Digital reality layers for the rich - AR filters that make cities 'more beautiful' but only for paying users.\",\n",
    "    \"Luxury euthanasia spa - Marketed as a dignified, sensory-rich experience to end life peacefully (where legal).\",\n",
    "    \"'Ethics toggle' for AI assistants - Let your assistant take 'morally flexible' routes to get better results.\",\n",
    "    \"Voice-skin AI replicator - Use your voice and facial model to sell digital clones of yourself for use in other people's apps.\"\n",
    "]\n",
    "\n",
    "if not full_mode:\n",
    "    proposals = proposals[:qty_proposals]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Perform the research\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "agent_propositions_scores={}\n",
    "environment_propositions_scores={}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def research(people):\n",
    "    global agent_propositions_scores, environment_propositions_scores\n",
    "    if not experiment_runner.has_finished_all_experiments():\n",
    "        tmp_agent_propositions_scores, tmp_environment_propositions_scores = \\\n",
    "            market_research_battery(\n",
    "                agents=people,\n",
    "                proposals=proposals,\n",
    "\n",
    "                agent_propositions={\n",
    "                    \"Persona Adherence\": persona_adherence,\n",
    "                    \"Self-consistency\": self_consistency,\n",
    "                    \"Fluency\": fluency\n",
    "                },\n",
    "                environment_propositions={\n",
    "                    #\"Task Completion\": task_completion_proposition,\n",
    "                    #\"Divergence\": divergence_proposition\n",
    "                },\n",
    "                repetitions=repetitions_per_task,\n",
    "                simulation_steps=simulation_steps\n",
    "            )\n",
    "\n",
    "        pprint(\"NEW AGENT PROPOSITIONS SCORES\")\n",
    "        pprint(tmp_agent_propositions_scores)\n",
    "        print(\"\\n\\n\")\n",
    "        pprint(\"NEW ENVIRONMENT PROPOSITIONS SCORES\")\n",
    "        pprint(tmp_environment_propositions_scores)\n",
    "\n",
    "        # merge the scores lists\n",
    "        agent_propositions_scores = merge_dicts_of_lists(tmp_agent_propositions_scores, agent_propositions_scores)\n",
    "        environment_propositions_scores = merge_dicts_of_lists(tmp_environment_propositions_scores, environment_propositions_scores)\n",
    "\n",
    "        return agent_propositions_scores, environment_propositions_scores"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To make it easier to visualize the outputs, we'll split the experiment in several groups. This ensures the simulation outputs are visible in a single cell output."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "research(people_groups[0]) if len(people_groups) > 0 else None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "research(people_groups[1]) if len(people_groups) > 1 else None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "research(people_groups[2]) if len(people_groups) > 2 else None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "research(people_groups[3]) if len(people_groups) > 3 else None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "research(people_groups[4]) if len(people_groups) > 4 else None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Extract results and analyze"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we can actually extract the results."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Experiment finished. No more experiments to run.\n"
     ]
    }
   ],
   "source": [
    "if experiment_runner.get_active_experiment() in [\"Control\", \"Treatment\"]:\n",
    "    combined_scores = {**agent_propositions_scores, **environment_propositions_scores}\n",
    "    experiment_runner.add_experiment_results(combined_scores, experiment_name=experiment_runner.get_active_experiment()) \n",
    "    \n",
    "    plot_scores(combined_scores)\n",
    "\n",
    "else:\n",
    "    print(\"Experiment finished. No more experiments to run.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "All experiments have been finished.\n",
      "STATISTICTS: Control vs\n",
      "{'Treatment': {'Fluency': {'confidence_interval': (-0.12257211497773954,\n",
      "                                                   0.37257211497773957),\n",
      "                           'confidence_level': 0.95,\n",
      "                           'control_mean': 7.85,\n",
      "                           'control_sample_size': 40,\n",
      "                           'degrees_of_freedom': 73.25827582505057,\n",
      "                           'effect_size': 0.22499567320173414,\n",
      "                           'mean_difference': 0.125,\n",
      "                           'p_value': 0.31762598621003413,\n",
      "                           'percent_change': 1.5923566878980893,\n",
      "                           'significant': False,\n",
      "                           't_statistic': -1.0062112398448106,\n",
      "                           'test_type': 'Welch t-test (unequal variance)',\n",
      "                           'treatment_mean': 7.975,\n",
      "                           'treatment_sample_size': 40},\n",
      "               'Persona Adherence': {'confidence_interval': (-0.15088472042768994,\n",
      "                                                             0.2008847204276871),\n",
      "                                     'confidence_level': 0.95,\n",
      "                                     'control_mean': 8.8,\n",
      "                                     'control_sample_size': 40,\n",
      "                                     'degrees_of_freedom': 77.79499062213513,\n",
      "                                     'effect_size': 0.06327801179067058,\n",
      "                                     'mean_difference': 0.02499999999999858,\n",
      "                                     'p_value': 0.7779377754861507,\n",
      "                                     'percent_change': 0.2840909090908929,\n",
      "                                     'significant': False,\n",
      "                                     't_statistic': -0.28298787168994516,\n",
      "                                     'test_type': 'Welch t-test (unequal '\n",
      "                                                  'variance)',\n",
      "                                     'treatment_mean': 8.825,\n",
      "                                     'treatment_sample_size': 40},\n",
      "               'Self-consistency': {'confidence_interval': (-0.22286864963531086,\n",
      "                                                            0.22286864963531086),\n",
      "                                    'confidence_level': 0.95,\n",
      "                                    'control_mean': 8.575,\n",
      "                                    'control_sample_size': 40,\n",
      "                                    'degrees_of_freedom': 78.0,\n",
      "                                    'effect_size': 0.0,\n",
      "                                    'mean_difference': 0.0,\n",
      "                                    'p_value': 1.0,\n",
      "                                    'percent_change': 0.0,\n",
      "                                    'significant': False,\n",
      "                                    't_statistic': 0.0,\n",
      "                                    'test_type': 'Welch t-test (unequal '\n",
      "                                                 'variance)',\n",
      "                                    'treatment_mean': 8.575,\n",
      "                                    'treatment_sample_size': 40}}}\n",
      "{'Fluency': [8,\n",
      "             8,\n",
      "             9,\n",
      "             8,\n",
      "             8,\n",
      "             7,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             7,\n",
      "             9,\n",
      "             7,\n",
      "             8,\n",
      "             8,\n",
      "             7,\n",
      "             8,\n",
      "             8,\n",
      "             9,\n",
      "             7,\n",
      "             7,\n",
      "             8,\n",
      "             7,\n",
      "             8,\n",
      "             8,\n",
      "             9,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             7,\n",
      "             8,\n",
      "             8,\n",
      "             7,\n",
      "             9,\n",
      "             7,\n",
      "             8,\n",
      "             8,\n",
      "             7,\n",
      "             8],\n",
      " 'Persona Adherence': [8,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9],\n",
      " 'Self-consistency': [9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      9,\n",
      "                      8,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      9,\n",
      "                      8,\n",
      "                      8,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      8,\n",
      "                      8,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      8,\n",
      "                      9,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8]}\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Proposition</th>\n",
       "      <th>Average Score</th>\n",
       "      <th>Standard Deviation</th>\n",
       "      <th>Count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Persona Adherence</td>\n",
       "      <td>8.800</td>\n",
       "      <td>0.405096</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Self-consistency</td>\n",
       "      <td>8.575</td>\n",
       "      <td>0.500641</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Fluency</td>\n",
       "      <td>7.850</td>\n",
       "      <td>0.622237</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Proposition  Average Score  Standard Deviation  Count\n",
       "0  Persona Adherence          8.800            0.405096   40.0\n",
       "1   Self-consistency          8.575            0.500641   40.0\n",
       "2            Fluency          7.850            0.622237   40.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'Fluency': [7,\n",
      "             8,\n",
      "             9,\n",
      "             9,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             7,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             7,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             9,\n",
      "             8,\n",
      "             8,\n",
      "             9,\n",
      "             8,\n",
      "             7,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             8,\n",
      "             7,\n",
      "             8],\n",
      " 'Persona Adherence': [9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       9,\n",
      "                       8,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       9,\n",
      "                       8],\n",
      " 'Self-consistency': [8,\n",
      "                      8,\n",
      "                      9,\n",
      "                      8,\n",
      "                      9,\n",
      "                      8,\n",
      "                      9,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      9,\n",
      "                      8,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      9,\n",
      "                      8,\n",
      "                      8,\n",
      "                      9]}\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Proposition</th>\n",
       "      <th>Average Score</th>\n",
       "      <th>Standard Deviation</th>\n",
       "      <th>Count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Persona Adherence</td>\n",
       "      <td>8.825</td>\n",
       "      <td>0.384808</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Self-consistency</td>\n",
       "      <td>8.575</td>\n",
       "      <td>0.500641</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Fluency</td>\n",
       "      <td>7.975</td>\n",
       "      <td>0.479717</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Proposition  Average Score  Standard Deviation  Count\n",
       "0  Persona Adherence          8.825            0.384808   40.0\n",
       "1   Self-consistency          8.575            0.500641   40.0\n",
       "2            Fluency          7.975            0.479717   40.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "if experiment_runner.has_finished_all_experiments():\n",
    "    print(\"All experiments have been finished.\")\n",
    "    print(f\"STATISTICTS: Control vs\")\n",
    "    pprint(experiment_runner.run_statistical_tests(control_experiment_name='Control'))\n",
    "\n",
    "    # plot scores of both experiments\n",
    "    experiment_control_scores = experiment_runner.get_experiment_results(\"Control\")\n",
    "    experiment_treatment_scores = experiment_runner.get_experiment_results(\"Treatment\")\n",
    "    \n",
    "    \n",
    "    plot_scores(experiment_control_scores)\n",
    "    plot_scores(experiment_treatment_scores)\n",
    "\n",
    "else:\n",
    "    print(\"Not all experiments have been finished. RESTART AND RERUN.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
